notebook.community

Edit and run



In [2]:

    
import pandas as pd



In [1]:

    
import numpy as np
from collections import OrderedDict
from bokeh.charts import Scatter
from bokeh.charts import Bar
from bokeh.charts import Histogram
from bokeh.plotting import output_notebook, show
output_notebook()
import datavis as dv









    




    
        
        
        
    
        
        BokehJS successfully loaded.



In [61]:

    
from bokeh.palettes import brewer
from bokeh.models import LinearAxis, Range1d
from bokeh.plotting import *



In [4]:

    
gfti = pd.read_pickle("../CDPdata/gfti.pkl")



In [265]:

    
gfti = pd.read_pickle("../CDPdata/profiles2.pkl")



In [266]:

    
gfti.columns









    Out[266]:





Index([u'year', u'Organisation', u'Country', u'Industry', u'Sector', u'scope1', u'scope2', u'Revenues', u'COGS', u'Equity', u'PPE', u'Assets', u'Income', u's1and2total', u's1and2intensity', u'hasabsolute', u'hasintensity', u'target type', u'lnintensity', u'pCOGS', u'pPPE', u'ROE', u'pcintensity', u'plateau', u'steady', u'Industrials', u'Consumer Discretionary', u'Materials', u'Information Technology', u'Consumer Staples', u'Utilities', u'Energy', u'ConsumerDiscretionary', u'InformationTechnology', u'ConsumerStaples', u'USA', u'UnitedKingdom', u'Japan', u'SouthAfrica', u'France', u'SouthKorea', u'Germany', u'country', u'lnCOGS', u'lnPPE'], dtype='object')

graph emissions reported totals by country and sector



In [267]:

    
# need scopeg to get counts and totals
# screen out 2008
sc = gfti.set_index("Organisation")
sc = sc[sc["year"].isin(range(2009,2014))]
scopeg = sc.groupby("year")



In [268]:

    
# get the counts
d = scopeg.describe()
d = d.reset_index()
scope12_dis = d[d["level_1"]=="count"][["year", "scope1"]]
scope12_dis.rename(columns={"scope1": "n"}, inplace=True)



In [269]:

    
t = scopeg.sum().reset_index() # .rename(columns={"scope1": "totaltons"})
t.set_index("year", inplace=True)
# tons to Mt CO2e
t["scope1"] = t["scope1"]/1000000
t["scope2"] = t["scope2"]/1000000
scope12_dis.set_index("year", inplace=True)
scope12_dis = scope12_dis.join(t)



In [270]:

    
scope12_dis









    Out[270]:






  
    
      
      n
      scope1
      scope2
      Revenues
      COGS
      Equity
      PPE
      Assets
      Income
      s1and2total
      ...
      ConsumerStaples
      USA
      UnitedKingdom
      Japan
      SouthAfrica
      France
      SouthKorea
      Germany
      lnCOGS
      lnPPE
    
    
      year
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2009
        629
       2541.707460
       585.404580
       6.413210e+08
       4.484680e+08
       3.020914e+08
       2.617664e+08
       7.319289e+08
       31520976.299614
       3.127112e+09
      ...
        68
       215
        86
        59
       25
       28
       24
       28
      -inf
      -inf
    
    
      2010
        787
       3410.717664
       671.261160
       8.225650e+08
       5.732981e+08
       4.009627e+08
       3.183774e+08
       8.935984e+08
       55229269.037530
       4.081979e+09
      ...
        84
       241
       101
        92
       39
       32
       30
       36
      -inf
      -inf
    
    
      2011
        926
       3896.411890
       749.143890
       9.241278e+08
       6.576092e+08
       4.659793e+08
       3.760603e+08
       1.019032e+09
       54907687.240198
       4.645556e+09
      ...
        94
       278
       111
       101
       43
       36
       37
       35
      -inf
      -inf
    
    
      2012
       1068
       4323.619389
       789.097193
       1.084900e+09
       7.813952e+08
       5.554828e+08
       5.127672e+08
       1.288933e+09
       46439420.963059
       5.112717e+09
      ...
       108
       312
       125
       116
       46
       45
       47
       40
      -inf
      -inf
    
    
      2013
       1075
       4135.568080
       787.157610
       1.299321e+09
       9.287255e+08
       6.690813e+08
       6.536406e+08
       1.629194e+09
       63384044.558930
       4.922726e+09
      ...
       107
       313
       125
       112
       47
       49
       50
       39
      -inf
      -inf
    
  

5 rows × 39 columns



In [243]:

    
years = []
for i in scope12_dis.index:
    years.append(str(i))



In [244]:

    
values12 = OrderedDict()
values12["Companies That Disclosed That Year"] = scope12_dis["n"].values
# values12["Scope 1"]= scope12_dis["scope1"].values
# values12["Scope 2"]= scope12_dis["scope2"].values



In [245]:

    
bar = Bar(values12, years, title="Companies in the Sample", filename="scope12_disclosure.html",
           stacked=False, ylabel="n", xlabel="Year", palette=brewer["Spectral"][7])
#yend = 10000
# bar.y_range.end = 6000
# bar.y_range.end = 2000
#bar.extra_y_ranges = {"Mtons": Range1d(start=0, end=yend)}
# bar.add_layout(LinearAxis(y_range_name="Mtons", axis_label="Mt CO2e"), 'right')
# bar.show()



In [246]:

    
show(bar)



In [271]:

    
scope12 = sc



In [438]:

    
sectors_groups = scope12.reset_index().groupby("Sector")
sector_ns = sectors_groups.apply(lambda g: len(g["Organisation"].value_counts().index))



In [440]:

    
sector_ns.to_csv("../CDPdata/sectorns.csv")



In [441]:

    
c_groups = scope12.reset_index().groupby("Country")
c_ns = c_groups.apply(lambda g: len(g["Organisation"].value_counts().index))



In [442]:

    
c_ns.to_csv("../CDPdata/countryns.csv")

graph things



In [364]:

    
scope12["Sector"] = scope12["Sector"].apply(lambda x: "Telecoms" if x=="Telecommunication Services" else x)



In [366]:

    
scope12["scope1"] = scope12["scope1"]/1000000
scope12["scope2"] = scope12["scope2"]/1000000



In [367]:

    
scope12_cs = scope12.groupby(["year","Country","Sector"])
scope12_c = scope12.groupby(["year", "Country"])
scope12_s = scope12.groupby(["year", "Sector"])



In [368]:

    
# get sums of emissions disclosed
scope12_csums = scope12_c.sum().reset_index().set_index('year')
scope12_ssums = scope12_s.sum().reset_index().set_index('year')



In [369]:

    
# get count of number of companies that reported
d = scope12_s.describe()
d = d.reset_index()
scope12_sdis = d[d["level_2"]=="count"][["year", "Sector","scope1", "scope2"]]
scope12_sdis.set_index("year",inplace=True)



In [370]:

    
# get count of number of companies that reported by country
d = scope12_c.describe()
d = d.reset_index()
scope12_cdis = d[d["level_2"]=="count"][["year", "Country","scope1", "scope2"]]
scope12_cdis.set_index("year",inplace=True)



In [371]:

    
scope12_sdis.head()









    Out[371]:






  
    
      
      Sector
      scope1
      scope2
    
    
      year
      
      
      
    
  
  
    
      2009
       Consumer Discretionary
       88
       88
    
    
      2009
             Consumer Staples
       68
       68
    
    
      2009
                       Energy
       31
       31
    
    
      2009
                   Financials
       30
       30
    
    
      2009
                  Health Care
       40
       40



In [372]:

    
g = scope12.reset_index().groupby("Country").sum().sort("scope1", ascending=0)
h = scope12.reset_index().groupby("Sector").sum().sort("scope1", ascending=0)
countries = g.index.tolist()
sectors = h.index.tolist()



In [373]:

    
sectors = scope12_sdis.loc[2010].sort("scope2", ascending= 0)["Sector"].tolist()
# sectors = scope12_sdis["Sector"].value_counts().index



In [390]:

    
countries = scope12_cdis.loc[2010].sort("scope2", ascending= 0)["Country"].tolist()



In [392]:

    
# run for sums data
scope1_csums_cats = dv.separate_cats(scope12_csums, "Country", countries, "scope1").fillna(0)
scope1_ssums_cats = dv.separate_cats(scope12_ssums, "Sector", sectors, "scope1").fillna(0)
scope2_csums_cats = dv.separate_cats(scope12_csums, "Country", countries, "scope2").fillna(0)
scope2_ssums_cats = dv.separate_cats(scope12_ssums, "Sector", sectors, "scope2").fillna(0)



In [401]:

    
scope12_cdis_cats









    Out[401]:






  
    
      
      USA
      United Kingdom
      Japan
      South Africa
      Germany
      France
      South Korea
      Sweden
      Switzerland
      Spain
      ...
      New Zealand
      Russia
      Israel
      Colombia
      Luxembourg
      Chile
      Singapore
      Austria
      Hungary
      Cyprus
    
    
      year
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2009
       215
        86
        59
       25
       28
       28
       24
       23
       14
       15
      ...
       1
       1
       1
       1
       1
       1
       0
       1
       0
       0
    
    
      2010
       241
       101
        92
       39
       36
       32
       30
       23
       18
       18
      ...
       2
       2
       2
       2
       2
       1
       1
       1
       1
       1
    
    
      2011
       278
       111
       101
       43
       35
       36
       37
       26
       20
       21
      ...
       4
       2
       2
       2
       2
       1
       1
       4
       1
       1
    
    
      2012
       312
       125
       116
       46
       40
       45
       47
       30
       24
       22
      ...
       6
       2
       2
       2
       2
       2
       2
       6
       1
       0
    
    
      2013
       313
       125
       112
       47
       39
       49
       50
       31
       25
       23
      ...
       6
       1
       2
       3
       2
       2
       2
       6
       1
       1
    
  

5 rows × 34 columns



In [393]:

    
# can run for dis(closure) or sums data
scope12_cdis_cats = dv.separate_cats(scope12_cdis, "Country", countries, "scope1").fillna(0)
scope12_sdis_cats = dv.separate_cats(scope12_sdis, "Sector", sectors, "scope1").fillna(0)



In [111]:

    
# for stacked area chart

# max number of brewer colors is 11
s1_csums_vals = dv.stacked_cols(scope1_csums_cats, countries)
# s1_ssums_vals = dv.stacked_cols(scope1_ssums_cats, sectors)
# colors = brewer["Spectral"][len(sectors)]
colors = brewer["Spectral"][11]
x2 = np.hstack((range(2009,2014)[::-1], range(2009,2014)))

title = "Scope 1 CO2e Disclosed by Country"
# title = "Scope 1 CO2e Disclosed by Sector"
# title = "Companies Disclosing Scope 1 by Country"
s = figure(title = title)
s.patches([x2 for a in s1_csums_vals], list(s1_csums_vals.values()),
          color=colors, alpha=0.8, line_color=None)









    Out[111]:





<bokeh.plotting.Figure at 0x7faa07eacbd0>



In [385]:

    
s1_csums_vs = dv.prep_stacked_bar(scope1_csums_cats, countries)
s2_csums_vs = dv.prep_stacked_bar(scope2_csums_cats, countries)
s1_ssums_vs = dv.prep_stacked_bar(scope1_ssums_cats, sectors)
s2_ssums_vs = dv.prep_stacked_bar(scope2_ssums_cats, sectors)



In [402]:

    
s12_cdis_vs = dv.prep_stacked_bar(scope12_cdis_cats, countries)
s12_sdis_vs = dv.prep_stacked_bar(scope12_sdis_cats, sectors)



In [384]:

    
reload(dv)









    Out[384]:





<module 'datavis' from 'datavis.py'>



In [394]:

    
s1cbar = Bar(s1_csums_vs, years + ["", ""], title="Scope 1 CO2e Disclosed by Country", legend="bottom_right",
           stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])

s2cbar = Bar(s2_csums_vs, years + ["", ""], title="Scope 2 CO2e Disclosed by Country", legend="bottom_right",
           stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])

s1sbar = Bar(s1_ssums_vs, years + ["", ""], title="Scope 1 CO2e Disclosed by Sector", legend="bottom_right",
           stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])

s2sbar = Bar(s2_ssums_vs, years + ["", ""], title="Scope 2 CO2e Disclosed by Sector", legend="bottom_right",
           stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])



In [403]:

    
s12cdbar = Bar(s12_cdis_vs, years + ["", ""], title="Companies That Disclosed by Country", legend="bottom_right",
           stacked=True, ylabel="n", xlabel="Year", palette=brewer["Spectral"][11])

s12sdbar = Bar(s12_sdis_vs, years + ["", ""], title="Companies That Disclosed by Sector", legend="bottom_right",
           stacked=True, ylabel="n", xlabel="Year", palette=brewer["Spectral"][11])



In [400]:

    
print countries









    



[u'USA', u'United Kingdom', u'Japan', u'South Africa', u'Germany', u'France', u'South Korea', u'Sweden', u'Switzerland', u'Spain', u'Brazil', u'Taiwan', u'Finland', u'Norway', u'India', u'Denmark', u'Italy', u'Netherlands', u'Belgium', u'Portugal', u'Australia', u'Ireland', u'Hong Kong', u'Turkey', u'New Zealand', u'Russia', u'Israel', u'Colombia', u'Luxembourg', u'Chile', u'Singapore', u'Austria', u'Hungary', u'Cyprus']



In [404]:

    
show(s12cdbar)

histograms of info for each year



In [283]:

    
gftip=gfti[~gfti["pcintensity"].isnull()]



In [287]:

    
gftip["pc100"] = gftip["pcintensity"]*100









    



-c:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [300]:

    
summary = gftip[['year','pc100']].groupby("year").describe()



In [312]:

    
mean = summary.reset_index().set_index(["level_1","year"]).sort_index().loc["mean"]
median = summary.reset_index().set_index(["level_1","year"]).sort_index().loc["50%"]



In [314]:

    
mean.rename(columns={"pc100":"mean"}, inplace=True)
median.rename(columns={"pc100":"median"}, inplace=True)



In [320]:

    
pc_avgs= mean.join(median).reset_index()



In [321]:

    
pc_avgs.to_csv("../CDPdata/pc_avgs.csv")



In [25]:

    
gftip = dv.prep_forhist(gftip, "pcintensity", -1, 1)



In [322]:

    
gftip = dv.prep_forhist(gftip, "pc100", -100, 100)



In [323]:

    
gftip.set_index("year",inplace=True)



In [33]:

    
plateaus = gftip[gftip["plateau"]]
steady = gftip[gftip["steady"]]



In [36]:

    
plateau_values = OrderedDict()
for yr in range(2010,2014):
    plateau_values[str(yr)] = plateaus.loc[yr]["pcintensity"].tolist()



In [37]:

    
steady_values = OrderedDict()
for yr in range(2010,2014):
    steady_values[str(yr)] = steady.loc[yr]["pcintensity"].tolist()



In [324]:

    
intensity_values = OrderedDict()
for yr in range(2010,2014):
    intensity_values[str(yr)] = gftip.loc[yr]["pc100"].tolist()



In [408]:

    
len(intensity_values["2013"])









    Out[408]:





1047



In [350]:

    
colors = brewer["BuPu"][5][0:4]
colors.reverse()



In [425]:

    
# fname = "2012 had inttarget vs int change.html"
title =  "Annual Percent Intensity Change"
fname = "12intchangeyear.html"
hist = Histogram(intensity_values, bins=40, filename=fname, title = title, ylabel = "Density", xlabel = "% Change",legend=True)
# hist = Histogram(hiall_values, bins=30, filename=fname, title = title, ylabel = "", xlabel = "",legend=True)



In [426]:

    
show(hist)



In [ ]:

	n	scope1	scope2	Revenues	COGS	Equity	PPE	Assets	Income	s1and2total	...	ConsumerStaples	USA	UnitedKingdom	Japan	SouthAfrica	France	SouthKorea	Germany	lnCOGS	lnPPE
year
2009	629	2541.707460	585.404580	6.413210e+08	4.484680e+08	3.020914e+08	2.617664e+08	7.319289e+08	31520976.299614	3.127112e+09	...	68	215	86	59	25	28	24	28	-inf	-inf
2010	787	3410.717664	671.261160	8.225650e+08	5.732981e+08	4.009627e+08	3.183774e+08	8.935984e+08	55229269.037530	4.081979e+09	...	84	241	101	92	39	32	30	36	-inf	-inf
2011	926	3896.411890	749.143890	9.241278e+08	6.576092e+08	4.659793e+08	3.760603e+08	1.019032e+09	54907687.240198	4.645556e+09	...	94	278	111	101	43	36	37	35	-inf	-inf
2012	1068	4323.619389	789.097193	1.084900e+09	7.813952e+08	5.554828e+08	5.127672e+08	1.288933e+09	46439420.963059	5.112717e+09	...	108	312	125	116	46	45	47	40	-inf	-inf
2013	1075	4135.568080	787.157610	1.299321e+09	9.287255e+08	6.690813e+08	6.536406e+08	1.629194e+09	63384044.558930	4.922726e+09	...	107	313	125	112	47	49	50	39	-inf	-inf

	Sector	scope1	scope2
year
2009	Consumer Discretionary	88	88
2009	Consumer Staples	68	68
2009	Energy	31	31
2009	Financials	30	30
2009	Health Care	40	40